Exploratory Data Analysis¶

Importing necessary Libraries

In [1]:
import warnings
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

from  sklearn.decomposition import  PCA
from sklearn import preprocessing
from sklearn import tree
from sklearn.manifold import TSNE
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn import metrics
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.model_selection import train_test_split


pd.set_option('display.max_columns',None)
warnings.filterwarnings('ignore')
%matplotlib inline 

Loading the train and test data residing in the project folder

In [2]:
data_train = pd.read_csv('/Users/harshitchandrol/Documents/SEM 2 /Advance Data Mining /Project /train.csv')
data_test = pd.read_csv('/Users/harshitchandrol/Documents/SEM 2 /Advance Data Mining /Project /test.csv')
In [3]:
print(data_train.shape)
(7352, 563)
In [4]:
print(data_test.shape)
(2947, 563)
In [5]:
data_train['subject'].nunique()
Out[5]:
21
In [6]:
data_test['subject'].nunique()
Out[6]:
9

the number of subject for which the observtions are captures in the train and test data are 21 and 9 respectively. Hence, we can say that the train and test data is split into 7:3 ratio.

Data Cleaning¶

In [7]:
#Duplicate Values
print('Duplicate values in train data: {}'.format(sum(data_train.duplicated())))
print('Duplicate values in test data: {}'.format(sum(data_test.duplicated())))
Duplicate values in train data: 0
Duplicate values in test data: 0

we have no duplicate observation in our train and test data. (The data is already engineered)

In [8]:
# Chancing for NaN and Null values
print('NaN/Null values in train data: {}'.format(data_train.isnull().sum()))
print('NaN/Null values in test data: {}'.format(data_test.isnull().sum()))
NaN/Null values in train data: tBodyAcc-mean()-X       0
tBodyAcc-mean()-Y       0
tBodyAcc-mean()-Z       0
tBodyAcc-std()-X        0
tBodyAcc-std()-Y        0
                       ..
angle(X,gravityMean)    0
angle(Y,gravityMean)    0
angle(Z,gravityMean)    0
subject                 0
Activity                0
Length: 563, dtype: int64
NaN/Null values in test data: tBodyAcc-mean()-X       0
tBodyAcc-mean()-Y       0
tBodyAcc-mean()-Z       0
tBodyAcc-std()-X        0
tBodyAcc-std()-Y        0
                       ..
angle(X,gravityMean)    0
angle(Y,gravityMean)    0
angle(Z,gravityMean)    0
subject                 0
Activity                0
Length: 563, dtype: int64

as the data is pre engineered, we dont have any missing values as well. if it had then we would have removed the missing values using '.dropna()' method.

Exploratory Data Analysis¶

Cheking data imbalance¶
In [9]:
sns.set_style('whitegrid')
plt.rcParams['font.family'] = 'Dejavu Sans'

plt.figure(figsize=(20,10))
plt.title('Data provided by each subject',fontsize=20)
sns.color_palette("pastel")
sns.countplot(x='subject',hue='Activity',data=data_train)
plt.show()

Same number of observation cn be seen by each candidate

How is the Activity distributed¶
In [10]:
plt.title('No of Datapoints per Activity', fontsize=15)
sns.countplot(data_train.Activity)
sns.color_palette("dark")
plt.xticks(rotation=90)
plt.show()
In [11]:
import plotly
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import matplotlib as mlt

# Plotting data
label_counts = data_train['Activity'].value_counts()

# Get colors
n = label_counts.shape[0]
colormap = plt.get_cmap('viridis')
colors = [mlt.colors.to_hex(colormap(col)) for col in np.arange(0, 1.01, 1/(n-1))]

# Create plot
data = go.Bar(x = label_counts.index,
              y = label_counts,
              marker = dict(color = colors))

layout = go.Layout(title = 'Smartphone Activity Distribution',
                   xaxis = dict(title = 'Activity'),
                   yaxis = dict(title = 'Count'))

fig = go.Figure(data=[data], layout=layout)
fig.show()
#iplot(fig)

Featuring Engineering from Domain Knowledge¶

Static and Dynamic Activities

In static activities (sit, stand, lie down) motion information will not be very useful. In the dynamic activities (Walking, WalkingUpstairs,WalkingDownstairs) motion info will be significa

In [12]:
sns.set_palette("Set1", desat=0.80)
facetgrid = sns.FacetGrid(data_train, hue='Activity', height=5,aspect=2)
facetgrid.map(sns.distplot,'tBodyAccMag-mean()', hist=False)\
    .add_legend()
plt.annotate("Stationary Activities", xy=(-0.960,12), xytext=(-0.5, 15), size=20,\
            va='center', ha='left',\
            arrowprops=dict(arrowstyle="simple",connectionstyle="arc3,rad=0.1"))

plt.annotate("Moving Activities", xy=(0,3), xytext=(0.2, 9), size=20,\
            va='center', ha='left',\
            arrowprops=dict(arrowstyle="simple",connectionstyle="arc3,rad=0.1"))
plt.show()

As aspected from most real world data, when participants are moving the data is normally distributed with some long tail.

In [13]:
# for plotting purposes taking datapoints of each activity to a different dataframe
df1 = data_train[data_train['Activity']=='STANDING']
df2 = data_train[data_train['Activity']=='SITTING']
df3 = data_train[data_train['Activity']=='LAYING']
df4 = data_train[data_train['Activity']=='WALKING']
df5 = data_train[data_train['Activity']=='WALKING_DOWNSTAIRS']
df6 = data_train[data_train['Activity']=='WALKING_UPSTAIRS']

plt.figure(figsize=(20,7))
plt.subplot(2,2,1)
plt.title('Stationary Activities(Zoomed in)')
sns.distplot(df4['tBodyAccMag-mean()'],color = 'r',hist = False, label = 'SITTING')
sns.distplot(df5['tBodyAccMag-mean()'],color = 'm',hist = False,label = 'STANDING')
sns.distplot(df6['tBodyAccMag-mean()'],color = 'c',hist = False, label = 'LAYING')
plt.axis([-1.08, -0.2, 0.1, 20])
plt.legend(loc='center')

plt.subplot(2,2,2)
plt.title('Moving Activities')
sns.distplot(df1['tBodyAccMag-mean()'],color = 'red',hist = False, label = 'WALKING')
sns.distplot(df2['tBodyAccMag-mean()'],color = 'blue',hist = False,label = 'WALING UP')
sns.distplot(df3['tBodyAccMag-mean()'],color = 'green',hist = False, label = 'WALKING DOWN')
plt.legend(loc='center right')


plt.tight_layout()
plt.show()

Maginuude of acceleration¶

In [14]:
plt.figure(figsize=(7,5))
sns.boxplot(x='Activity', y='tBodyAccMag-mean()',data=data_train, showfliers=False, saturation=1)
plt.ylabel('Acceleration Magnitude mean')
plt.axhline(y=-0.7, xmin=0.1, xmax=0.9,dashes=(5,5), c='b')
plt.axhline(y=-0.05, xmin=0.4, dashes=(5,5), c='g')
plt.xticks(rotation=90)
plt.show()

If tAccMean is < -0.8 then the Activities are either Standing or Sitting or Laying.

If tAccMean is > -0.6 then the Activities are either Walking or WalkingDownstairs or WalkingUpstairs.

If tAccMean > 0.0 then the Activity is WalkingDownstairs.

We can classify 75% the Acitivity labels with some errors.

Position of GravityAccelerationComponants also matters¶

In [15]:
sns.boxplot(x='Activity', y='angle(X,gravityMean)', data=data_train)
plt.axhline(y=0.08, xmin=0.1, xmax=0.9,c='m',dashes=(5,3))
plt.title('Angle between X-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.show()

If angleX,gravityMean > 0 then Activity is Laying.

We can classify all datapoints belonging to Laying activity with just a single if else statement with Grvity mean in 2 or 3 (X,Y or Z) dimensions.

In [16]:
sns.boxplot(x='Activity', y='angle(Y,gravityMean)', data = data_train, showfliers=False)
plt.title('Angle between Y-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()
In [17]:
sns.boxplot(x='Activity', y='angle(Z,gravityMean)', data = data_train, showfliers=False)
plt.title('Angle between Z-axis and Gravity_mean', fontsize=15)
plt.xticks(rotation = 40)
plt.axhline(y=-0.22, xmin=0.1, xmax=0.8, dashes=(5,3), c='m')
plt.show()
t-SNE on the data_train¶

The dataset is geared towards classifying the activity of the participant. Let us investigate the separability of the classes.

In [18]:
# performs t-sne with different perplexity values and their repective plots..

def perform_tsne(X_data, y_data, perplexities, n_iter=1000, img_name_prefix='t-sne'):
        
    for index,perplexity in enumerate(perplexities):
        # perform t-sne
        print('\nperforming tsne with perplexity {} and with {} iterations at max'.format(perplexity, n_iter))
        X_reduced = TSNE(verbose=2, perplexity=perplexity).fit_transform(X_data)
        print('Done..')
        
        # prepare the data for seaborn         
        print('Creating plot for this t-sne visualization..')
        df = pd.DataFrame({'x':X_reduced[:,0], 'y':X_reduced[:,1] ,'label':y_data})
        
        # draw the plot in appropriate place in the grid
        sns.lmplot(data=df, x='x', y='y', hue='label', fit_reg=False, height=8,\
                   palette="Set1",markers=['^','v','s','o', '1','2'])
        plt.title("perplexity : {} and max_iter : {}".format(perplexity, n_iter))
        img_name = img_name_prefix + '_perp_{}_iter_{}.png'.format(perplexity, n_iter)
        print('saving this plot as image in present working directory...')
        plt.savefig(img_name)
        plt.show()
        print('Done')
In [19]:
X_pre_tsne = data_train.drop(['subject', 'Activity'], axis=1)
y_pre_tsne = data_train['Activity']
perform_tsne(X_data = X_pre_tsne,y_data=y_pre_tsne, perplexities =[2,5,10,20,50])
performing tsne with perplexity 2 and with 1000 iterations at max
[t-SNE] Computing 7 nearest neighbors...
[t-SNE] Indexed 7352 samples in 0.001s...
[t-SNE] Computed neighbors for 7352 samples in 0.966s...
[t-SNE] Computed conditional probabilities for sample 1000 / 7352
[t-SNE] Computed conditional probabilities for sample 2000 / 7352
[t-SNE] Computed conditional probabilities for sample 3000 / 7352
[t-SNE] Computed conditional probabilities for sample 4000 / 7352
[t-SNE] Computed conditional probabilities for sample 5000 / 7352
[t-SNE] Computed conditional probabilities for sample 6000 / 7352
[t-SNE] Computed conditional probabilities for sample 7000 / 7352
[t-SNE] Computed conditional probabilities for sample 7352 / 7352
[t-SNE] Mean sigma: 0.597443
[t-SNE] Computed conditional probabilities in 0.016s
[t-SNE] Iteration 50: error = 124.6806946, gradient norm = 0.0277756 (50 iterations in 0.874s)
[t-SNE] Iteration 100: error = 106.6843567, gradient norm = 0.0275516 (50 iterations in 0.686s)
[t-SNE] Iteration 150: error = 100.5849457, gradient norm = 0.0193792 (50 iterations in 0.577s)
[t-SNE] Iteration 200: error = 97.2667542, gradient norm = 0.0154741 (50 iterations in 0.558s)
[t-SNE] Iteration 250: error = 95.0155640, gradient norm = 0.0142860 (50 iterations in 0.564s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 95.015564
[t-SNE] Iteration 300: error = 4.1172471, gradient norm = 0.0015668 (50 iterations in 0.558s)
[t-SNE] Iteration 350: error = 3.2064433, gradient norm = 0.0010139 (50 iterations in 0.565s)
[t-SNE] Iteration 400: error = 2.7765982, gradient norm = 0.0007163 (50 iterations in 0.583s)
[t-SNE] Iteration 450: error = 2.5120106, gradient norm = 0.0005610 (50 iterations in 0.560s)
[t-SNE] Iteration 500: error = 2.3279648, gradient norm = 0.0004774 (50 iterations in 0.581s)
[t-SNE] Iteration 550: error = 2.1895463, gradient norm = 0.0004144 (50 iterations in 0.577s)
[t-SNE] Iteration 600: error = 2.0799561, gradient norm = 0.0003655 (50 iterations in 0.611s)
[t-SNE] Iteration 650: error = 1.9897580, gradient norm = 0.0003316 (50 iterations in 0.601s)
[t-SNE] Iteration 700: error = 1.9139977, gradient norm = 0.0003000 (50 iterations in 0.627s)
[t-SNE] Iteration 750: error = 1.8488334, gradient norm = 0.0002775 (50 iterations in 0.611s)
[t-SNE] Iteration 800: error = 1.7920055, gradient norm = 0.0002585 (50 iterations in 0.617s)
[t-SNE] Iteration 850: error = 1.7420161, gradient norm = 0.0002386 (50 iterations in 0.610s)
[t-SNE] Iteration 900: error = 1.6971433, gradient norm = 0.0002239 (50 iterations in 0.603s)
[t-SNE] Iteration 950: error = 1.6568744, gradient norm = 0.0002103 (50 iterations in 0.616s)
[t-SNE] Iteration 1000: error = 1.6207337, gradient norm = 0.0001988 (50 iterations in 0.616s)
[t-SNE] KL divergence after 1000 iterations: 1.620734
Done..
Creating plot for this t-sne visualization..
saving this plot as image in present working directory...
Done

performing tsne with perplexity 5 and with 1000 iterations at max
[t-SNE] Computing 16 nearest neighbors...
[t-SNE] Indexed 7352 samples in 0.001s...
[t-SNE] Computed neighbors for 7352 samples in 0.921s...
[t-SNE] Computed conditional probabilities for sample 1000 / 7352
[t-SNE] Computed conditional probabilities for sample 2000 / 7352
[t-SNE] Computed conditional probabilities for sample 3000 / 7352
[t-SNE] Computed conditional probabilities for sample 4000 / 7352
[t-SNE] Computed conditional probabilities for sample 5000 / 7352
[t-SNE] Computed conditional probabilities for sample 6000 / 7352
[t-SNE] Computed conditional probabilities for sample 7000 / 7352
[t-SNE] Computed conditional probabilities for sample 7352 / 7352
[t-SNE] Mean sigma: 0.961446
[t-SNE] Computed conditional probabilities in 0.021s
[t-SNE] Iteration 50: error = 113.9493713, gradient norm = 0.0213829 (50 iterations in 0.947s)
[t-SNE] Iteration 100: error = 97.4634094, gradient norm = 0.0167576 (50 iterations in 0.687s)
[t-SNE] Iteration 150: error = 93.0681076, gradient norm = 0.0095275 (50 iterations in 0.685s)
[t-SNE] Iteration 200: error = 91.1163483, gradient norm = 0.0069787 (50 iterations in 0.583s)
[t-SNE] Iteration 250: error = 89.9588394, gradient norm = 0.0051896 (50 iterations in 0.621s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 89.958839
[t-SNE] Iteration 300: error = 3.5685205, gradient norm = 0.0014601 (50 iterations in 0.595s)
[t-SNE] Iteration 350: error = 2.8102651, gradient norm = 0.0007495 (50 iterations in 0.579s)
[t-SNE] Iteration 400: error = 2.4290500, gradient norm = 0.0005268 (50 iterations in 0.589s)
[t-SNE] Iteration 450: error = 2.2122097, gradient norm = 0.0004087 (50 iterations in 0.593s)
[t-SNE] Iteration 500: error = 2.0680881, gradient norm = 0.0003304 (50 iterations in 0.634s)
[t-SNE] Iteration 550: error = 1.9632096, gradient norm = 0.0002831 (50 iterations in 0.658s)
[t-SNE] Iteration 600: error = 1.8820696, gradient norm = 0.0002470 (50 iterations in 0.652s)
[t-SNE] Iteration 650: error = 1.8171486, gradient norm = 0.0002196 (50 iterations in 0.646s)
[t-SNE] Iteration 700: error = 1.7634370, gradient norm = 0.0001973 (50 iterations in 0.654s)
[t-SNE] Iteration 750: error = 1.7180336, gradient norm = 0.0001823 (50 iterations in 0.672s)
[t-SNE] Iteration 800: error = 1.6792035, gradient norm = 0.0001666 (50 iterations in 0.690s)
[t-SNE] Iteration 850: error = 1.6456238, gradient norm = 0.0001521 (50 iterations in 0.713s)
[t-SNE] Iteration 900: error = 1.6160247, gradient norm = 0.0001421 (50 iterations in 0.706s)
[t-SNE] Iteration 950: error = 1.5897765, gradient norm = 0.0001339 (50 iterations in 0.682s)
[t-SNE] Iteration 1000: error = 1.5662081, gradient norm = 0.0001271 (50 iterations in 0.690s)
[t-SNE] KL divergence after 1000 iterations: 1.566208
Done..
Creating plot for this t-sne visualization..
saving this plot as image in present working directory...
Done

performing tsne with perplexity 10 and with 1000 iterations at max
[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 7352 samples in 0.001s...
[t-SNE] Computed neighbors for 7352 samples in 0.943s...
[t-SNE] Computed conditional probabilities for sample 1000 / 7352
[t-SNE] Computed conditional probabilities for sample 2000 / 7352
[t-SNE] Computed conditional probabilities for sample 3000 / 7352
[t-SNE] Computed conditional probabilities for sample 4000 / 7352
[t-SNE] Computed conditional probabilities for sample 5000 / 7352
[t-SNE] Computed conditional probabilities for sample 6000 / 7352
[t-SNE] Computed conditional probabilities for sample 7000 / 7352
[t-SNE] Computed conditional probabilities for sample 7352 / 7352
[t-SNE] Mean sigma: 1.133827
[t-SNE] Computed conditional probabilities in 0.038s
[t-SNE] Iteration 50: error = 106.0404510, gradient norm = 0.0164523 (50 iterations in 0.918s)
[t-SNE] Iteration 100: error = 90.9416046, gradient norm = 0.0103236 (50 iterations in 0.843s)
[t-SNE] Iteration 150: error = 87.7861252, gradient norm = 0.0074197 (50 iterations in 0.758s)
[t-SNE] Iteration 200: error = 86.4843750, gradient norm = 0.0050606 (50 iterations in 0.732s)
[t-SNE] Iteration 250: error = 85.7295151, gradient norm = 0.0029502 (50 iterations in 0.738s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 85.729515
[t-SNE] Iteration 300: error = 3.1488535, gradient norm = 0.0013967 (50 iterations in 0.696s)
[t-SNE] Iteration 350: error = 2.5036268, gradient norm = 0.0006513 (50 iterations in 0.696s)
[t-SNE] Iteration 400: error = 2.1824427, gradient norm = 0.0004233 (50 iterations in 0.692s)
[t-SNE] Iteration 450: error = 1.9976087, gradient norm = 0.0003158 (50 iterations in 0.604s)
[t-SNE] Iteration 500: error = 1.8784996, gradient norm = 0.0002529 (50 iterations in 0.622s)
[t-SNE] Iteration 550: error = 1.7943295, gradient norm = 0.0002119 (50 iterations in 0.616s)
[t-SNE] Iteration 600: error = 1.7313924, gradient norm = 0.0001855 (50 iterations in 0.625s)
[t-SNE] Iteration 650: error = 1.6817471, gradient norm = 0.0001620 (50 iterations in 0.627s)
[t-SNE] Iteration 700: error = 1.6421925, gradient norm = 0.0001455 (50 iterations in 0.624s)
[t-SNE] Iteration 750: error = 1.6096678, gradient norm = 0.0001317 (50 iterations in 0.643s)
[t-SNE] Iteration 800: error = 1.5822169, gradient norm = 0.0001194 (50 iterations in 0.633s)
[t-SNE] Iteration 850: error = 1.5590074, gradient norm = 0.0001108 (50 iterations in 0.612s)
[t-SNE] Iteration 900: error = 1.5387011, gradient norm = 0.0001041 (50 iterations in 0.630s)
[t-SNE] Iteration 950: error = 1.5213611, gradient norm = 0.0000998 (50 iterations in 0.628s)
[t-SNE] Iteration 1000: error = 1.5065804, gradient norm = 0.0000931 (50 iterations in 0.611s)
[t-SNE] KL divergence after 1000 iterations: 1.506580
Done..
Creating plot for this t-sne visualization..
saving this plot as image in present working directory...
Done

performing tsne with perplexity 20 and with 1000 iterations at max
[t-SNE] Computing 61 nearest neighbors...
[t-SNE] Indexed 7352 samples in 0.001s...
[t-SNE] Computed neighbors for 7352 samples in 0.952s...
[t-SNE] Computed conditional probabilities for sample 1000 / 7352
[t-SNE] Computed conditional probabilities for sample 2000 / 7352
[t-SNE] Computed conditional probabilities for sample 3000 / 7352
[t-SNE] Computed conditional probabilities for sample 4000 / 7352
[t-SNE] Computed conditional probabilities for sample 5000 / 7352
[t-SNE] Computed conditional probabilities for sample 6000 / 7352
[t-SNE] Computed conditional probabilities for sample 7000 / 7352
[t-SNE] Computed conditional probabilities for sample 7352 / 7352
[t-SNE] Mean sigma: 1.274336
[t-SNE] Computed conditional probabilities in 0.076s
[t-SNE] Iteration 50: error = 97.8272934, gradient norm = 0.0162839 (50 iterations in 0.912s)
[t-SNE] Iteration 100: error = 84.1415939, gradient norm = 0.0066233 (50 iterations in 0.728s)
[t-SNE] Iteration 150: error = 82.0585632, gradient norm = 0.0036928 (50 iterations in 0.652s)
[t-SNE] Iteration 200: error = 81.2350388, gradient norm = 0.0026874 (50 iterations in 0.726s)
[t-SNE] Iteration 250: error = 80.8051758, gradient norm = 0.0017583 (50 iterations in 0.667s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 80.805176
[t-SNE] Iteration 300: error = 2.7084403, gradient norm = 0.0013095 (50 iterations in 0.632s)
[t-SNE] Iteration 350: error = 2.1716480, gradient norm = 0.0005794 (50 iterations in 0.642s)
[t-SNE] Iteration 400: error = 1.9208679, gradient norm = 0.0003482 (50 iterations in 0.609s)
[t-SNE] Iteration 450: error = 1.7743288, gradient norm = 0.0002481 (50 iterations in 0.638s)
[t-SNE] Iteration 500: error = 1.6805768, gradient norm = 0.0001940 (50 iterations in 0.666s)
[t-SNE] Iteration 550: error = 1.6164960, gradient norm = 0.0001568 (50 iterations in 0.647s)
[t-SNE] Iteration 600: error = 1.5700113, gradient norm = 0.0001337 (50 iterations in 0.651s)
[t-SNE] Iteration 650: error = 1.5345597, gradient norm = 0.0001171 (50 iterations in 0.655s)
[t-SNE] Iteration 700: error = 1.5068108, gradient norm = 0.0001056 (50 iterations in 0.644s)
[t-SNE] Iteration 750: error = 1.4846615, gradient norm = 0.0000961 (50 iterations in 0.627s)
[t-SNE] Iteration 800: error = 1.4667996, gradient norm = 0.0000901 (50 iterations in 0.618s)
[t-SNE] Iteration 850: error = 1.4523740, gradient norm = 0.0000836 (50 iterations in 0.622s)
[t-SNE] Iteration 900: error = 1.4403598, gradient norm = 0.0000787 (50 iterations in 0.628s)
[t-SNE] Iteration 950: error = 1.4302145, gradient norm = 0.0000762 (50 iterations in 0.630s)
[t-SNE] Iteration 1000: error = 1.4214487, gradient norm = 0.0000730 (50 iterations in 0.631s)
[t-SNE] KL divergence after 1000 iterations: 1.421449
Done..
Creating plot for this t-sne visualization..
saving this plot as image in present working directory...
Done

performing tsne with perplexity 50 and with 1000 iterations at max
[t-SNE] Computing 151 nearest neighbors...
[t-SNE] Indexed 7352 samples in 0.001s...
[t-SNE] Computed neighbors for 7352 samples in 1.051s...
[t-SNE] Computed conditional probabilities for sample 1000 / 7352
[t-SNE] Computed conditional probabilities for sample 2000 / 7352
[t-SNE] Computed conditional probabilities for sample 3000 / 7352
[t-SNE] Computed conditional probabilities for sample 4000 / 7352
[t-SNE] Computed conditional probabilities for sample 5000 / 7352
[t-SNE] Computed conditional probabilities for sample 6000 / 7352
[t-SNE] Computed conditional probabilities for sample 7000 / 7352
[t-SNE] Computed conditional probabilities for sample 7352 / 7352
[t-SNE] Mean sigma: 1.437672
[t-SNE] Computed conditional probabilities in 0.185s
[t-SNE] Iteration 50: error = 86.5481644, gradient norm = 0.0213117 (50 iterations in 0.950s)
[t-SNE] Iteration 100: error = 75.5884323, gradient norm = 0.0042653 (50 iterations in 0.832s)
[t-SNE] Iteration 150: error = 74.6465530, gradient norm = 0.0025135 (50 iterations in 0.729s)
[t-SNE] Iteration 200: error = 74.2947617, gradient norm = 0.0014695 (50 iterations in 0.727s)
[t-SNE] Iteration 250: error = 74.1211166, gradient norm = 0.0013156 (50 iterations in 0.774s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 74.121117
[t-SNE] Iteration 300: error = 2.1523256, gradient norm = 0.0011806 (50 iterations in 0.737s)
[t-SNE] Iteration 350: error = 1.7555063, gradient norm = 0.0004877 (50 iterations in 0.707s)
[t-SNE] Iteration 400: error = 1.5864614, gradient norm = 0.0002822 (50 iterations in 0.695s)
[t-SNE] Iteration 450: error = 1.4929533, gradient norm = 0.0001889 (50 iterations in 0.701s)
[t-SNE] Iteration 500: error = 1.4331270, gradient norm = 0.0001403 (50 iterations in 0.716s)
[t-SNE] Iteration 550: error = 1.3917806, gradient norm = 0.0001125 (50 iterations in 0.732s)
[t-SNE] Iteration 600: error = 1.3625529, gradient norm = 0.0000948 (50 iterations in 0.721s)
[t-SNE] Iteration 650: error = 1.3411245, gradient norm = 0.0000826 (50 iterations in 0.703s)
[t-SNE] Iteration 700: error = 1.3254485, gradient norm = 0.0000744 (50 iterations in 0.698s)
[t-SNE] Iteration 750: error = 1.3139782, gradient norm = 0.0000698 (50 iterations in 0.722s)
[t-SNE] Iteration 800: error = 1.3053586, gradient norm = 0.0000627 (50 iterations in 0.718s)
[t-SNE] Iteration 850: error = 1.2985430, gradient norm = 0.0000643 (50 iterations in 0.759s)
[t-SNE] Iteration 900: error = 1.2934688, gradient norm = 0.0000586 (50 iterations in 0.721s)
[t-SNE] Iteration 950: error = 1.2890513, gradient norm = 0.0000548 (50 iterations in 0.733s)
[t-SNE] Iteration 1000: error = 1.2848508, gradient norm = 0.0000534 (50 iterations in 0.690s)
[t-SNE] KL divergence after 1000 iterations: 1.284851
Done..
Creating plot for this t-sne visualization..
saving this plot as image in present working directory...
Done

We can clearly see from the TSNE cluster, All the Activities can be cleanly seperated except "Standing" and "Sitting".

Principal Component Analysis¶
In [20]:
data_whole = pd.concat([data_train,data_test]) # combining both train and test data
data_whole= data_train.sample(frac=1) #The sample() method returns a specified number of random rows.
f_data = data_whole.loc[:,data_whole.columns.str.contains('^f|^s|^a|^A')]
t_data = data_whole.loc[:,data_whole.columns.str.contains('^t|^s|^a|^A')]

X = data_whole.drop(['subject','Activity'],axis=1) # dropping subject and activity column
y = data_whole['Activity'] # getting only label as y

X_f = f_data.drop(['subject','Activity'],axis=1) # taking frequency components
y_f = f_data['Activity']

X_t = t_data.drop(['subject','Activity'],axis=1) # taking time components
y_t = t_data['Activity']

#intializing PCA
pca = PCA(n_components=0.99)
pca.fit(X)
X_reduced = pca.transform(X)

#printing the #numbers 
print('Frequency components are {} Time components are {}'.format(X_f.shape[1],X.shape[1]-X_f.shape[1]))
print('Original components are {} Reduced components are {}'.format(X.shape[1],X_reduced.shape[1]))

#then splitting out data

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,
                                                 shuffle=True,random_state=444) # splitting original data
X_train_pca,X_test_pca,y_train_pca,y_test_pca = train_test_split(X_reduced,y,test_size=0.2,
                                                                 shuffle=True,random_state=444) # splitting pca data
X_train_f,X_test_f,y_train_f,y_test_f = train_test_split(X_f,y_f,test_size=0.2,
                                                         shuffle=True,
                                                         random_state=444) # splitting frequency components data
X_train_t,X_test_t,y_train_t,y_test_t = train_test_split(X_t,y_t,test_size=0.2,
                                                         shuffle=True,
                                                         random_state=444) # splitting time components data
Frequency components are 296 Time components are 265
Original components are 561 Reduced components are 155
In [21]:
# checking the shape of original split data
print('X_train and y_train : ({},{})'.format(X_train.shape, y_train.shape))
print('X_test  and y_test  : ({},{})'.format(X_test.shape, y_test.shape))
X_train and y_train : ((5881, 561),(5881,))
X_test  and y_test  : ((1471, 561),(1471,))

Modeling¶

Labels for confusion Matirx

In [22]:
labels=['LAYING', 'SITTING','STANDING','WALKING','WALKING_DOWNSTAIRS','WALKING_UPSTAIRS']

Function to plot the confusion matrix

In [23]:
plt.rcParams['font.family'] = 'sans-serif'

def plot_confusion_matrix(cm,classes,normalize=False,title='Confusion Matrix',cmap=plt.cm.RdYlGn):
    if normalize:
        clm=cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
        
    plt.imshow(clm,interpolation='nearest',cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks,classes,rotation=90)
    plt.yticks(tick_marks,classes)
    
    fmt= '.2f' if normalize else 'd'
    thresh = cm.max()/2.
    for i in range (cm.shape[0]):
        for j in range (cm.shape[1]):
            plt.text(j,i,format(clm[i,j],fmt),horizontalalignment='center',color='white' if clm[i,j] > thresh 
                     else 'black')
        
    plt.tight_layout()
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')

Function to run models

In [24]:
from datetime import datetime 
def perform_model(model,X_train,y_train,X_test,y_test,class_labels,cm_normalize=True,
                  print_cm=True,cm_map=plt.cm.Greens):
    
    # creating a dictionary to store various results
    results = dict()
    
    #time at which model stat runnig
    train_start_time = datetime.now()
    print('training the model ...')
    model.fit(X_train,y_train)
    print('Done..!\n')
    train_end_time = datetime.now()
    results['training_time'] = train_end_time - train_start_time
    print('--> training time -{}\n'.format(results['training_time']))
    
    #pridicting test data
    print('Predicting test data')
    test_start_time = datetime.now()
    y_pred = model.predict(X_test)
    test_end_time = datetime.now()
    print('Done..!\n')
    results['testing_time'] = test_end_time-test_start_time
    print('--> testing time -{}'.format(results['testing_time']))
    
    # calculating overall accuracy of the model
    accuracy = metrics.accuracy_score(y_true=y_test,y_pred=y_pred)
    #store accuracy in results
    results['accuracy'] = accuracy
    print('--> accuracy -{}\n'.format(accuracy))
    
    #confusion matrix
    cm = metrics.confusion_matrix(y_test,y_pred)
    results['confusion_matrix'] = cm
    if print_cm:
        print('\n -------------Confusion Matirx---------------')
        print('\n{}'.format(cm))
        
    #plot confusion matrix
    plt.figure(figsize=(8,8))
    plt.grid(b=False)
    plot_confusion_matrix(cm,classes=class_labels,normalize=True,title='Normalized Comfusion Matrix',cmap=cm_map)
    plt.show()
    
    # getting classification report
    print('--------------------| Classification Report |-------------------')
    classification_report = metrics.classification_report(y_test,y_pred)
    
    # storing report in results 
    results['Classification_report'] = classification_report
    print(classification_report)
    
    # adding the trained model to the results
    results['Model'] = model
    
    return results

Function to print the grid search parameters

In [25]:
def print_grid_search_attributes(model):
    
    #Estimator that have highest score from all the estimators
    print('\n\n--> Best Estimator:')
    print('\t{}\n'.format(model.best_estimator_))
    
    #best result parameter
    print('\n--> Best Parameter')
    print('\tBest Estimator Parameter :{}'.format(model.best_params_))
    
    # number of cross validation split
    print('\n Number of CrossValidation sets:')
    print('\t Total number of cross validation sets: {}'.format(model.n_splits_))
    
    #Average cross vaidation score of the best estimator
    print('\n--> Best Score:')
    print('\tAverage Cross Validation score of best estimator: {}'.format(model.best_score_))
    

Logistic Regression with Grid Search

In [26]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=ConvergenceWarning)

from sklearn.model_selection import GridSearchCV
# starting the grid 
param_grid = {'C':[0.01,0.1,1,10,20,30],'penalty':['l2','l1']}
log_reg = LogisticRegression()
log_reg_grid = GridSearchCV(log_reg,param_grid=param_grid,cv=3,verbose=1,n_jobs=-1)
log_reg_grid_results= perform_model(log_reg_grid,X_train,y_train,X_test,y_test,class_labels=labels)

# cheking the attributes of the modelfi
print_grid_search_attributes(log_reg_grid_results['Model'])
training the model ...
Fitting 3 folds for each of 12 candidates, totalling 36 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Done..!

--> training time -0:00:05.934872

Predicting test data
Done..!

--> testing time -0:00:00.003633
--> accuracy -0.9789259007477906


 -------------Confusion Matirx---------------

[[304   0   0   0   0   1]
 [  1 252  14   0   0   1]
 [  0   9 275   0   0   0]
 [  0   0   0 217   0   1]
 [  0   0   0   1 194   1]
 [  0   0   0   1   1 198]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.97      0.94      0.95       268
          STANDING       0.95      0.97      0.96       284
           WALKING       0.99      1.00      0.99       218
WALKING_DOWNSTAIRS       0.99      0.99      0.99       196
  WALKING_UPSTAIRS       0.98      0.99      0.99       200

          accuracy                           0.98      1471
         macro avg       0.98      0.98      0.98      1471
      weighted avg       0.98      0.98      0.98      1471



--> Best Estimator:
	LogisticRegression(C=10)


--> Best Parameter
	Best Estimator Parameter :{'C': 10, 'penalty': 'l2'}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 3

--> Best Score:
	Average Cross Validation score of best estimator: 0.9806156271789694

Linear SVC

In [27]:
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc = LinearSVC(tol=0.00005)
lr_svc_grid = GridSearchCV(lr_svc, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_results = perform_model(lr_svc_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(lr_svc_grid_results['Model'])
training the model ...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
Done..!

--> training time -0:00:12.230188

Predicting test data
Done..!

--> testing time -0:00:00.003524
--> accuracy -0.9857239972807614


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  0 253  15   0   0   0]
 [  0   6 278   0   0   0]
 [  0   0   0 218   0   0]
 [  0   0   0   0 196   0]
 [  0   0   0   0   0 200]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.98      0.94      0.96       268
          STANDING       0.95      0.98      0.96       284
           WALKING       1.00      1.00      1.00       218
WALKING_DOWNSTAIRS       1.00      1.00      1.00       196
  WALKING_UPSTAIRS       1.00      1.00      1.00       200

          accuracy                           0.99      1471
         macro avg       0.99      0.99      0.99      1471
      weighted avg       0.99      0.99      0.99      1471



--> Best Estimator:
	LinearSVC(C=1, tol=5e-05)


--> Best Parameter
	Best Estimator Parameter :{'C': 1}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9855468185574994

Kernal SVM

In [28]:
from sklearn.svm import SVC
parameters = {'C':[2,8,16],\
              'gamma': [ 0.0078125, 0.125, 2]}
rbf_svm = SVC(kernel='rbf')
rbf_svm_grid = GridSearchCV(rbf_svm,param_grid=parameters, n_jobs=-1)
rbf_svm_grid_results = perform_model(rbf_svm_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(rbf_svm_grid_results['Model'])        
training the model ...
Done..!

--> training time -0:01:18.655584

Predicting test data
Done..!

--> testing time -0:00:00.399466
--> accuracy -0.9898028552005439


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  0 260   7   0   0   1]
 [  0   7 277   0   0   0]
 [  0   0   0 218   0   0]
 [  0   0   0   0 196   0]
 [  0   0   0   0   0 200]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.97      0.97      0.97       268
          STANDING       0.98      0.98      0.98       284
           WALKING       1.00      1.00      1.00       218
WALKING_DOWNSTAIRS       1.00      1.00      1.00       196
  WALKING_UPSTAIRS       1.00      1.00      1.00       200

          accuracy                           0.99      1471
         macro avg       0.99      0.99      0.99      1471
      weighted avg       0.99      0.99      0.99      1471



--> Best Estimator:
	SVC(C=16, gamma=0.0078125)


--> Best Parameter
	Best Estimator Parameter :{'C': 16, 'gamma': 0.0078125}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9870774308023975

Decision Tree with Grid Search

In [29]:
parameters = {'max_depth':np.arange(3,10,2)}
dt = DecisionTreeClassifier()
dt_grid = GridSearchCV(dt,param_grid=parameters, n_jobs=-1)
dt_grid_results = perform_model(dt_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(dt_grid_results['Model'])
training the model ...
Done..!

--> training time -0:00:04.795516

Predicting test data
Done..!

--> testing time -0:00:00.003643
--> accuracy -0.9401767505098573


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  0 247  20   1   0   0]
 [  0  21 263   0   0   0]
 [  0   0   0 204   5   9]
 [  0   0   0   6 182   8]
 [  0   0   0   9   9 182]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.92      0.92      0.92       268
          STANDING       0.93      0.93      0.93       284
           WALKING       0.93      0.94      0.93       218
WALKING_DOWNSTAIRS       0.93      0.93      0.93       196
  WALKING_UPSTAIRS       0.91      0.91      0.91       200

          accuracy                           0.94      1471
         macro avg       0.94      0.94      0.94      1471
      weighted avg       0.94      0.94      0.94      1471



--> Best Estimator:
	DecisionTreeClassifier(max_depth=9)


--> Best Parameter
	Best Estimator Parameter :{'max_depth': 9}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9420164837387801

Random Forest Classifier

In [30]:
params = {'n_estimators': np.arange(10,201,20), 'max_depth':np.arange(3,15,2)}
rfc = RandomForestClassifier()
rfc_grid = GridSearchCV(rfc, param_grid=params, n_jobs=-1)
rfc_grid_results = perform_model(rfc_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(rfc_grid_results['Model'])
training the model ...
Done..!

--> training time -0:02:17.285373

Predicting test data
Done..!

--> testing time -0:00:00.029575
--> accuracy -0.9768864717878993


 -------------Confusion Matirx---------------

[[304   0   0   0   0   1]
 [  0 257  10   0   0   1]
 [  0  12 272   0   0   0]
 [  0   0   0 213   2   3]
 [  0   0   0   0 192   4]
 [  0   0   0   0   1 199]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.96      0.96      0.96       268
          STANDING       0.96      0.96      0.96       284
           WALKING       1.00      0.98      0.99       218
WALKING_DOWNSTAIRS       0.98      0.98      0.98       196
  WALKING_UPSTAIRS       0.96      0.99      0.98       200

          accuracy                           0.98      1471
         macro avg       0.98      0.98      0.98      1471
      weighted avg       0.98      0.98      0.98      1471



--> Best Estimator:
	RandomForestClassifier(max_depth=13, n_estimators=150)


--> Best Parameter
	Best Estimator Parameter :{'max_depth': 13, 'n_estimators': 150}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9787448199330709

Gradient Boosted Decision Trees With GridSearch

In [31]:
from sklearn.ensemble import GradientBoostingClassifier
param_grid = {'max_depth': np.arange(5),'n_estimators':np.arange(140)}
gbdt = GradientBoostingClassifier()
gbdt_grid = GridSearchCV(gbdt, param_grid=param_grid, n_jobs=-1)
gbdt_grid_results = perform_model(gbdt_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(gbdt_grid_results['Model'])
training the model ...
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Input In [31], in <cell line: 5>()
      3 gbdt = GradientBoostingClassifier()
      4 gbdt_grid = GridSearchCV(gbdt, param_grid=param_grid, n_jobs=-1)
----> 5 gbdt_grid_results = perform_model(gbdt_grid, X_train, y_train, X_test, y_test, class_labels=labels)
      7 # observe the attributes of the model 
      8 print_grid_search_attributes(gbdt_grid_results['Model'])

Input In [24], in perform_model(model, X_train, y_train, X_test, y_test, class_labels, cm_normalize, print_cm, cm_map)
      8 train_start_time = datetime.now()
      9 print('training the model ...')
---> 10 model.fit(X_train,y_train)
     11 print('Done..!\n')
     12 train_end_time = datetime.now()

File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_search.py:891, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
    885     results = self._format_results(
    886         all_candidate_params, n_splits, all_out, all_more_results
    887     )
    889     return results
--> 891 self._run_search(evaluate_candidates)
    893 # multimetric is determined here because in the case of a callable
    894 # self.scoring the return type is only known after calling
    895 first_test_score = all_out[0]["test_scores"]

File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_search.py:1392, in GridSearchCV._run_search(self, evaluate_candidates)
   1390 def _run_search(self, evaluate_candidates):
   1391     """Search all candidates in param_grid"""
-> 1392     evaluate_candidates(ParameterGrid(self.param_grid))

File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/model_selection/_search.py:838, in BaseSearchCV.fit.<locals>.evaluate_candidates(candidate_params, cv, more_results)
    830 if self.verbose > 0:
    831     print(
    832         "Fitting {0} folds for each of {1} candidates,"
    833         " totalling {2} fits".format(
    834             n_splits, n_candidates, n_candidates * n_splits
    835         )
    836     )
--> 838 out = parallel(
    839     delayed(_fit_and_score)(
    840         clone(base_estimator),
    841         X,
    842         y,
    843         train=train,
    844         test=test,
    845         parameters=parameters,
    846         split_progress=(split_idx, n_splits),
    847         candidate_progress=(cand_idx, n_candidates),
    848         **fit_and_score_kwargs,
    849     )
    850     for (cand_idx, parameters), (split_idx, (train, test)) in product(
    851         enumerate(candidate_params), enumerate(cv.split(X, y, groups))
    852     )
    853 )
    855 if len(out) < 1:
    856     raise ValueError(
    857         "No fits were performed. "
    858         "Was the CV iterator empty? "
    859         "Were there no candidates?"
    860     )

File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/parallel.py:1056, in Parallel.__call__(self, iterable)
   1053     self._iterating = False
   1055 with self._backend.retrieval_context():
-> 1056     self.retrieve()
   1057 # Make sure that we get a last message telling us we are done
   1058 elapsed_time = time.time() - self._start_time

File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/parallel.py:935, in Parallel.retrieve(self)
    933 try:
    934     if getattr(self._backend, 'supports_timeout', False):
--> 935         self._output.extend(job.get(timeout=self.timeout))
    936     else:
    937         self._output.extend(job.get())

File ~/opt/anaconda3/lib/python3.9/site-packages/joblib/_parallel_backends.py:542, in LokyBackend.wrap_future_result(future, timeout)
    539 """Wrapper for Future.result to implement the same behaviour as
    540 AsyncResults.get from multiprocessing."""
    541 try:
--> 542     return future.result(timeout=timeout)
    543 except CfTimeoutError as e:
    544     raise TimeoutError from e

File ~/opt/anaconda3/lib/python3.9/concurrent/futures/_base.py:441, in Future.result(self, timeout)
    438 elif self._state == FINISHED:
    439     return self.__get_result()
--> 441 self._condition.wait(timeout)
    443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
    444     raise CancelledError()

File ~/opt/anaconda3/lib/python3.9/threading.py:312, in Condition.wait(self, timeout)
    310 try:    # restore state no matter what (e.g., KeyboardInterrupt)
    311     if timeout is None:
--> 312         waiter.acquire()
    313         gotit = True
    314     else:

KeyboardInterrupt: 
In [39]:
from sklearn.neighbors import KNeighborsClassifier
param_grid = {'n_neighbors':[1,10,30,50,80],'weights':['uniform','distance'],'metric':['euclidean','manhattan']}
knc = KNeighborsClassifier()
knc_grid = GridSearchCV(knc, param_grid=param_grid, n_jobs=-1)
knc_grid_results = perform_model(knc_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(knc_grid_results['Model'])
training the model ...
Done..!

--> training time -0:00:37.610952

Predicting test data
Done..!

--> testing time -0:00:03.292385
--> accuracy -0.9830047586675731


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  0 254  13   0   0   1]
 [  0  11 273   0   0   0]
 [  0   0   0 218   0   0]
 [  0   0   0   0 196   0]
 [  0   0   0   0   0 200]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.96      0.95      0.95       268
          STANDING       0.95      0.96      0.96       284
           WALKING       1.00      1.00      1.00       218
WALKING_DOWNSTAIRS       1.00      1.00      1.00       196
  WALKING_UPSTAIRS       1.00      1.00      1.00       200

          accuracy                           0.98      1471
         macro avg       0.98      0.98      0.98      1471
      weighted avg       0.98      0.98      0.98      1471



--> Best Estimator:
	KNeighborsClassifier(metric='manhattan', n_neighbors=1)


--> Best Parameter
	Best Estimator Parameter :{'metric': 'manhattan', 'n_neighbors': 1, 'weights': 'uniform'}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9811259168068247

Naive Bayes

In [40]:
from sklearn.naive_bayes import GaussianNB
param_grid = {'var_smoothing': np.logspace(0,-9, num=100)}
gnb = GaussianNB()
gnb_grid = GridSearchCV(gnb, param_grid=param_grid, n_jobs=-1)
gnb_grid_results = perform_model(gnb_grid, X_train, y_train, X_test, y_test, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(gnb_grid_results['Model'])
training the model ...
Done..!

--> training time -0:00:06.445757

Predicting test data
Done..!

--> testing time -0:00:00.012651
--> accuracy -0.8300475866757308


 -------------Confusion Matirx---------------

[[301   0   0   0   0   4]
 [  4 151 112   0   0   1]
 [  0   2 281   0   0   1]
 [  0   0   0 170  13  35]
 [  0   0   0  18 138  40]
 [  0   0   0  14   6 180]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       0.99      0.99      0.99       305
           SITTING       0.99      0.56      0.72       268
          STANDING       0.72      0.99      0.83       284
           WALKING       0.84      0.78      0.81       218
WALKING_DOWNSTAIRS       0.88      0.70      0.78       196
  WALKING_UPSTAIRS       0.69      0.90      0.78       200

          accuracy                           0.83      1471
         macro avg       0.85      0.82      0.82      1471
      weighted avg       0.86      0.83      0.83      1471



--> Best Estimator:
	GaussianNB(var_smoothing=0.0657933224657568)


--> Best Parameter
	Best Estimator Parameter :{'var_smoothing': 0.0657933224657568}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.8294467659621197
In [44]:
# ***************************** Print Accuracy and Error ******************************

print('\n                     Accuracy     Error')
print('                     ----------   --------')
print('Logistic Regression : {:.04}%       {:.04}%'.format(log_reg_grid_results['accuracy'] * 100,\
                                                  100-(log_reg_grid_results['accuracy'] * 100)))

print('Linear SVC          : {:.04}%       {:.04}% '.format(lr_svc_grid_results['accuracy'] * 100,\
                                                        100-(lr_svc_grid_results['accuracy'] * 100)))

print('rbf SVM classifier  : {:.04}%      {:.04}% '.format(rbf_svm_grid_results['accuracy'] * 100,\
                                                          100-(rbf_svm_grid_results['accuracy'] * 100)))

print('DecisionTree        : {:.04}%      {:.04}% '.format(dt_grid_results['accuracy'] * 100,\
                                                        100-(dt_grid_results['accuracy'] * 100)))

print('Random Forest       : {:.04}%      {:.04}% '.format(rfc_grid_results['accuracy'] * 100,\
                                                           100-(rfc_grid_results['accuracy'] * 100)))

print('KNeighborsclssifier : {:.04}%      {:.04}% '.format(knc_grid_results['accuracy'] * 100,\
                                                           100-(knc_grid_results['accuracy'] * 100)))

print('Naive Bayes         : {:.04}%      {:.04}% '.format(gnb_grid_results['accuracy'] * 100,\
                                                           100-(gnb_grid_results['accuracy'] * 100)))
                     Accuracy     Error
                     ----------   --------
Logistic Regression : 97.89%       2.107%
Linear SVC          : 98.57%       1.428% 
rbf SVM classifier  : 98.98%      1.02% 
DecisionTree        : 94.02%      5.982% 
Random Forest       : 97.69%      2.311% 
KNeighborsclssifier : 98.3%      1.7% 
Naive Bayes         : 83.0%      17.0% 
In [45]:
kernal_evals = dict()
def evaluate_classification(model, name, X_train, X_test, y_train, y_test):
    train_accuracy = metrics.accuracy_score(y_train, model.predict(X_train))
    test_accuracy = metrics.accuracy_score(y_test, model.predict(X_test))
    
    train_precision = metrics.precision_score(y_train, model.predict(X_train), average='micro')
    test_precision = metrics.precision_score(y_test, model.predict(X_test), average='micro')
    
    train_recall = metrics.recall_score(y_train, model.predict(X_train), average='micro')
    test_recall = metrics.recall_score(y_test, model.predict(X_test), average='micro')
    
    kernal_evals[str(name)] = [train_accuracy, test_accuracy, train_precision, test_precision, train_recall, test_recall]
    print("Training Accuracy " + str(name) + " {}  Test Accuracy ".format(train_accuracy*100) + str(name) + " {}".format(test_accuracy*100))
    print("Training Precesion " + str(name) + " {}  Test Precesion ".format(train_precision*100) + str(name) + " {}".format(test_precision*100))
    print("Training Recall " + str(name) + " {}  Test Recall ".format(train_recall*100) + str(name) + " {}".format(test_recall*100))
    
    actual = y_test
    predicted = model.predict(X_test)
    confusion_matrix = metrics.confusion_matrix(actual, predicted)
    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ['LAYING', 'SITTING', 'STANDING', 'WALKING', 'DOWNSTAIRS',
       'UPSTAIRS'])

    fig, ax = plt.subplots(figsize=(10,10))
    ax.grid(False)
    cm_display.plot(ax=ax)
In [48]:
le = preprocessing.LabelEncoder().fit(y_train)
y_xgb_train = le.transform(y_train)
y_xgb_test = le.transform(y_test)

xgb_params = {'n_estimators': 100,
              'learning_rate': 0.2,
              'subsample': 0.927,
              'colsample_bytree': 0.88,
              'max_depth': 5,
              'booster': 'gbtree', 
              'reg_lambda': 38,
              'reg_alpha': 32,
              'random_state': 12}
xgb = XGBClassifier(**xgb_params).fit(X_train, y_xgb_train)
evaluate_classification(xgb, "XGBClassifier", X_train, X_test, y_xgb_train, y_xgb_test)
Training Accuracy XGBClassifier 98.84373405883353  Test Accuracy XGBClassifier 96.87287559483345
Training Precesion XGBClassifier 98.84373405883353  Test Precesion XGBClassifier 96.87287559483345
Training Recall XGBClassifier 98.84373405883353  Test Recall XGBClassifier 96.87287559483345

from the above results we can see that our best model for the problem statement is either 'Linear SVC' or 'rbf SVM Classifier'.¶

In [49]:
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc_pca = LinearSVC(tol=0.00005)
lr_svc_pca_grid = GridSearchCV(lr_svc_pca, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_pca_results = perform_model(lr_svc_pca_grid, X_train_pca, y_train_pca, X_test_pca,
                                        y_test_pca, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(lr_svc_grid_pca_results['Model'])
training the model ...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
Done..!

--> training time -0:00:04.541936

Predicting test data
Done..!

--> testing time -0:00:00.001423
--> accuracy -0.9809653297076818


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  0 251  17   0   0   0]
 [  0   9 275   0   0   0]
 [  0   0   0 218   0   0]
 [  0   0   0   1 195   0]
 [  0   0   0   1   0 199]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.97      0.94      0.95       268
          STANDING       0.94      0.97      0.95       284
           WALKING       0.99      1.00      1.00       218
WALKING_DOWNSTAIRS       1.00      0.99      1.00       196
  WALKING_UPSTAIRS       1.00      0.99      1.00       200

          accuracy                           0.98      1471
         macro avg       0.98      0.98      0.98      1471
      weighted avg       0.98      0.98      0.98      1471



--> Best Estimator:
	LinearSVC(C=2, tol=5e-05)


--> Best Parameter
	Best Estimator Parameter :{'C': 2}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9841858408614081
In [53]:
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc_f = LinearSVC(tol=0.00005)
lr_svc_f_grid = GridSearchCV(lr_svc_f, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_f_results = perform_model(lr_svc_f_grid, X_train_f, y_train_f, X_test_f, y_test_f, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(lr_svc_grid_f_results['Model'])
training the model ...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
Done..!

--> training time -0:00:11.356503

Predicting test data
Done..!

--> testing time -0:00:00.002339
--> accuracy -0.9605710401087696


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  4 228  35   0   0   1]
 [  0  10 274   0   0   0]
 [  0   0   0 215   0   3]
 [  0   0   0   1 195   0]
 [  0   0   0   3   1 196]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       0.99      1.00      0.99       305
           SITTING       0.96      0.85      0.90       268
          STANDING       0.89      0.96      0.92       284
           WALKING       0.98      0.99      0.98       218
WALKING_DOWNSTAIRS       0.99      0.99      0.99       196
  WALKING_UPSTAIRS       0.98      0.98      0.98       200

          accuracy                           0.96      1471
         macro avg       0.96      0.96      0.96      1471
      weighted avg       0.96      0.96      0.96      1471



--> Best Estimator:
	LinearSVC(C=8, tol=5e-05)


--> Best Parameter
	Best Estimator Parameter :{'C': 8}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.969222455337275
In [54]:
from sklearn.svm import LinearSVC
parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}
lr_svc_t = LinearSVC(tol=0.00005)
lr_svc_t_grid = GridSearchCV(lr_svc_t, param_grid=parameters, n_jobs=-1, verbose=1)
lr_svc_grid_t_results = perform_model(lr_svc_t_grid, X_train_t, y_train_t, X_test_t, y_test_t, class_labels=labels)

# observe the attributes of the model 
print_grid_search_attributes(lr_svc_grid_t_results['Model'])
training the model ...
Fitting 5 folds for each of 6 candidates, totalling 30 fits
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
/Users/harshitchandrol/opt/anaconda3/lib/python3.9/site-packages/sklearn/svm/_base.py:1206: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.
  warnings.warn(
Done..!

--> training time -0:00:04.743023

Predicting test data
Done..!

--> testing time -0:00:00.003701
--> accuracy -0.9870836165873556


 -------------Confusion Matirx---------------

[[305   0   0   0   0   0]
 [  0 255  13   0   0   0]
 [  0   6 278   0   0   0]
 [  0   0   0 218   0   0]
 [  0   0   0   0 196   0]
 [  0   0   0   0   0 200]]
--------------------| Classification Report |-------------------
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       305
           SITTING       0.98      0.95      0.96       268
          STANDING       0.96      0.98      0.97       284
           WALKING       1.00      1.00      1.00       218
WALKING_DOWNSTAIRS       1.00      1.00      1.00       196
  WALKING_UPSTAIRS       1.00      1.00      1.00       200

          accuracy                           0.99      1471
         macro avg       0.99      0.99      0.99      1471
      weighted avg       0.99      0.99      0.99      1471



--> Best Estimator:
	LinearSVC(C=0.5, tol=5e-05)


--> Best Parameter
	Best Estimator Parameter :{'C': 0.5}

 Number of CrossValidation sets:
	 Total number of cross validation sets: 5

--> Best Score:
	Average Cross Validation score of best estimator: 0.9865670822279634
In [12]:
print('Linear SVC PCA       : {:.04}%      {:.04}% '.format(lr_svc_grid_pca_results['accuracy'] * 100,\
                                                           100-(lr_svc_grid_pca_results['accuracy'] * 100)))

print('Linear SVC Freqeuncy        : {:.04}%      {:.04}% '.format(lr_svc_grid_f_results['accuracy'] * 100,\
                                                           100-(lr_svc_grid_f_results['accuracy'] * 100)))

print('Linear SVC Time       : {:.04}%      {:.04}% '.format(lr_svc_grid_t_results['accuracy'] * 100,\
                                                           100-(lr_svc_grid_t_results['accuracy'] * 100)))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Input In [12], in <cell line: 1>()
----> 1 print('Linear SVC PCA       : {:.04}%      {:.04}% '.format(lr_svc_grid_pca_results['accuracy'] * 100,\
      2                                                            100-(lr_svc_grid_pca_results['accuracy'] * 100)))
      4 print('Linear SVC Freqeuncy        : {:.04}%      {:.04}% '.format(lr_svc_grid_f_results['accuracy'] * 100,\
      5                                                            100-(lr_svc_grid_f_results['accuracy'] * 100)))
      7 print('Linear SVC Time       : {:.04}%      {:.04}% '.format(lr_svc_grid_t_results['accuracy'] * 100,\
      8                                                            100-(lr_svc_grid_t_results['accuracy'] * 100)))

NameError: name 'lr_svc_grid_pca_results' is not defined
In [65]:
barplot = pd.DataFrame({'Accuracy': [97.89,98.1,96.06,98.71],
                         'Error': [2.107,1.903,3.943,1.292]},
                        index=['Linear SVC','Linear SVC(PCA)','Linear SVC Freqyency','Linear SVC Time'])

barplot.plot(kind='bar', stacked=True, color=['skyblue', 'red'])

# Add Title and Labels
plt.title('Accuray & Error Scores %')
plt.ylabel('Accuracy')
Out[65]:
Text(0, 0.5, 'Accuracy')